import mlflow
import pandas as pd
def generate_recommendations_with_comparision(
experiment_ids,
aggregation_function="common_features",
main_note="sizes_acts",
note_to_compare="sizes_L2_without_acts",
group_type="sim"
):
all_rows = []
for exp_id in experiment_ids:
runs = mlflow.search_runs(
experiment_ids=[exp_id],
output_format="list"
)
for run in runs:
note = run.data.params.get("note")
fusion = run.data.params.get("SAE_fusion_strategy")
group = run.data.params.get("group_type")
if fusion != aggregation_function or group != group_type:
continue
dataset = run.data.params.get("dataset", f"Exp-{exp_id}")
dim = int(run.data.params.get("embedding_dim", 0))
topk = int(run.data.params.get("top_k", 0))
row_key = (dataset, dim, topk)
model_type = "main" if note == main_note else "compare" if note == note_to_compare else None
if not model_type:
continue
metrics = {
("G/mean", model_type): run.data.metrics.get("CommonItemsNDCG20/median"),
("U/mean", model_type): run.data.metrics.get("NDCG20/mean"),
("U/min", model_type): run.data.metrics.get("NDCG20/min"),
("Pop", model_type): run.data.metrics.get("Popularity/mean"),
}
all_rows.append((row_key, metrics))
# Combine metrics into a dictionary
records = {}
for key, metrics in all_rows:
if key not in records:
records[key] = {}
records[key].update(metrics)
df = pd.DataFrame.from_dict(records, orient="index")
df.index.names = ["Dataset", "Dimensions", "TopK"]
# Calculate % differences
result_cols = []
for metric in sorted(set(k[0] for k in df.columns)):
main_col = (metric, "main")
compare_col = (metric, "compare")
percent_col = (metric, "% change")
if main_col in df.columns and compare_col in df.columns:
df[percent_col] = ((df[main_col] - df[compare_col]) / df[compare_col].abs()) * 100
result_cols.extend([main_col, percent_col])
elif main_col in df.columns:
result_cols.append(main_col)
# Keep only main and percent change columns, sort them by metric
df = df[result_cols]
df = df.round(2)
df = df.sort_index(axis=1, level=0).sort_values(by=["Dataset", "Dimensions", "TopK"])
return df.reset_index()
Normalized embeddings¶
Jedna z moznych veci, co muze v modelech nastat je, ze velikost sparse embeddingu muze mit odlisnou distribuci mezi cleny skupiny. jinymi slovy, nekdo muze mit vetsi hodnoty embeddingu nez nekdo jiny. Pri agregaci by to pote znamenalo, ze nektere uzivatele budou vice ovlivnovat vysledny embedding nez jini. To muze byt problem, pokud chceme, aby vysledky byly fair pro vsechny uzivatele.
Pojdme nejdrive prozkoumat, zda takovy jev opravdu nastava. Koukneme se na distribuci sumy hodnot v embeddingu pres sample 5000 uzivatelu. Opet vezmeme stejny priklad jako minule tedy dimenzi 2048 a topk 64. Jak je videt na grafu, histogram sum embeddingu uzivatelu tvori normalni rozdeleni. Vsichni uzivatele tedy nemaji stejne hodnoty a normalizace by mohla pomoct udelat uzivatele fairnejsi.
Nyni se jeste pojdme podivat na graf pokud vypneme normalizaci. Jak je videt, zde uz se nejedna o ciste normalni rozdeleni, ale ocasek u vetsich hodnot je mnohem vyraznejsi. I zde by mohla normalizace pomoci, tak aby nebyli nekteri uzivatele vetsiho embeddingu preferovani oproti ostatnim.
Noramlizaci, kterou chceme pouzit je vzit L2 normu embeddingu a pronasobit ji prumernou hodnotou embeddingu. Tedy normalizace bude vypadat takto:
\begin{equation*} \text{normalized\_embedding} = \frac{\text{embedding}}{\|\text{embedding}\|_2} \cdot \text{mean}(\text{embedding}) \end{equation*}Podivejme se jak tedy vypadaji normalizovane doporuceni. Nejdrive pro commen features bez aktivace
SAE group recommendation performance for common features aggregation function and similar groups¶
Comparision of base model and model with normalized embeddings
experiment_ids = ['333391697323445885', '523100174176986081']
generate_recommendations_with_comparision(
experiment_ids,
aggregation_function="common_features",
main_note="sizes_L2_without_acts_normalized",
note_to_compare="sizes_L2_without_acts",
group_type="sim"
)
| Dataset | Dimensions | TopK | G/mean | Pop | U/mean | U/min | |||||
|---|---|---|---|---|---|---|---|---|---|---|---|
| % change | main | % change | main | % change | main | % change | main | ||||
| 0 | LastFM1k | 1024 | 32 | 0.00 | 0.59 | 0.05 | 0.62 | -0.06 | 0.79 | -0.09 | 0.63 |
| 1 | LastFM1k | 1024 | 64 | 0.00 | 0.58 | -0.15 | 0.61 | 0.05 | 0.80 | -0.04 | 0.63 |
| 2 | LastFM1k | 1024 | 128 | 0.00 | 0.60 | 0.00 | 0.60 | 0.95 | 0.81 | 0.18 | 0.63 |
| 3 | LastFM1k | 2048 | 32 | 0.00 | 0.59 | -0.20 | 0.64 | -0.04 | 0.79 | 0.61 | 0.62 |
| 4 | LastFM1k | 2048 | 64 | 0.00 | 0.60 | 0.00 | 0.62 | 0.41 | 0.81 | 1.02 | 0.64 |
| 5 | LastFM1k | 2048 | 128 | 0.00 | 0.61 | 0.00 | 0.61 | 0.38 | 0.81 | -0.08 | 0.61 |
| 6 | LastFM1k | 4096 | 32 | 0.00 | 0.57 | 0.00 | 0.66 | 0.00 | 0.78 | -0.03 | 0.63 |
| 7 | LastFM1k | 4096 | 64 | 0.00 | 0.52 | 0.00 | 0.63 | -0.01 | 0.81 | -0.03 | 0.62 |
| 8 | LastFM1k | 4096 | 128 | 0.00 | 0.59 | 0.00 | 0.62 | -0.07 | 0.81 | -3.26 | 0.61 |
| 9 | MovieLens | 1024 | 32 | 0.00 | 0.58 | -1.02 | 0.51 | 0.86 | 0.65 | -0.20 | 0.52 |
| 10 | MovieLens | 1024 | 64 | -0.23 | 0.54 | 0.00 | 0.50 | 0.17 | 0.66 | -0.07 | 0.53 |
| 11 | MovieLens | 1024 | 128 | 0.00 | 0.65 | -0.59 | 0.49 | 0.14 | 0.67 | -0.21 | 0.51 |
| 12 | MovieLens | 2048 | 32 | 0.00 | 0.68 | 0.23 | 0.49 | -0.12 | 0.65 | -0.66 | 0.54 |
| 13 | MovieLens | 2048 | 64 | 0.00 | 0.45 | -0.98 | 0.48 | 0.18 | 0.65 | 0.04 | 0.53 |
| 14 | MovieLens | 2048 | 128 | 0.00 | 0.51 | 1.18 | 0.48 | 0.09 | 0.67 | 0.46 | 0.53 |
| 15 | MovieLens | 4096 | 32 | 0.87 | 0.67 | 0.43 | 0.49 | -0.00 | 0.66 | -1.32 | 0.54 |
| 16 | MovieLens | 4096 | 64 | 0.00 | 0.59 | 1.07 | 0.51 | 0.00 | 0.66 | 0.57 | 0.54 |
| 17 | MovieLens | 4096 | 128 | 0.34 | 0.57 | 0.02 | 0.49 | 0.48 | 0.65 | -0.35 | 0.51 |
Jak je videt, tato zmena je naprosto minimalni a nedochazi k zadne zmene v doporucenich. Nyni se podivejme na average s aktivaci.
SAE group recommendation performance for average aggregation function and similar groups¶
Comparision of base model and model with normalized embeddings
experiment_ids = ['333391697323445885', '523100174176986081']
generate_recommendations_with_comparision(
experiment_ids,
aggregation_function="average",
main_note="sizes_L2_with_acts_normalized",
note_to_compare="sizes_L2_with_acts",
group_type="sim"
)
| Dataset | Dimensions | TopK | G/mean | Pop | U/mean | U/min | |||||
|---|---|---|---|---|---|---|---|---|---|---|---|
| % change | main | % change | main | % change | main | % change | main | ||||
| 0 | LastFM1k | 1024 | 32 | 0.00 | 0.52 | 0.92 | 0.61 | -0.04 | 0.81 | 1.76 | 0.65 |
| 1 | LastFM1k | 1024 | 64 | 0.00 | 0.56 | -0.28 | 0.61 | 0.05 | 0.81 | -2.11 | 0.63 |
| 2 | LastFM1k | 1024 | 128 | -0.24 | 0.57 | 0.01 | 0.61 | 0.15 | 0.81 | -0.21 | 0.65 |
| 3 | LastFM1k | 2048 | 32 | -0.48 | 0.58 | -0.02 | 0.62 | 0.04 | 0.82 | -1.85 | 0.63 |
| 4 | LastFM1k | 2048 | 64 | 0.00 | 0.58 | 0.11 | 0.61 | -0.33 | 0.82 | -1.47 | 0.66 |
| 5 | LastFM1k | 2048 | 128 | -0.37 | 0.64 | 0.71 | 0.60 | -0.06 | 0.82 | -3.46 | 0.64 |
| 6 | LastFM1k | 4096 | 32 | 0.00 | 0.59 | 1.41 | 0.62 | 0.20 | 0.82 | -0.10 | 0.65 |
| 7 | LastFM1k | 4096 | 64 | -0.76 | 0.58 | 0.50 | 0.61 | -0.03 | 0.82 | -1.16 | 0.64 |
| 8 | LastFM1k | 4096 | 128 | -0.29 | 0.62 | -0.11 | 0.61 | -0.67 | 0.81 | -0.80 | 0.64 |
| 9 | MovieLens | 1024 | 32 | -3.73 | 0.57 | 0.44 | 0.53 | -0.09 | 0.69 | -0.08 | 0.58 |
| 10 | MovieLens | 1024 | 64 | -7.74 | 0.69 | -0.14 | 0.54 | -0.53 | 0.69 | 0.36 | 0.58 |
| 11 | MovieLens | 1024 | 128 | 0.00 | 0.66 | 0.27 | 0.54 | -0.56 | 0.69 | -0.01 | 0.58 |
| 12 | MovieLens | 2048 | 32 | 8.12 | 0.71 | -0.82 | 0.54 | 0.05 | 0.69 | -0.14 | 0.58 |
| 13 | MovieLens | 2048 | 64 | -7.31 | 0.62 | -0.11 | 0.53 | -0.52 | 0.69 | -0.38 | 0.58 |
| 14 | MovieLens | 2048 | 128 | 0.00 | 0.70 | 1.32 | 0.53 | 0.18 | 0.69 | 0.20 | 0.58 |
| 15 | MovieLens | 4096 | 32 | 10.06 | 0.63 | -0.86 | 0.54 | 0.36 | 0.68 | 0.06 | 0.56 |
| 16 | MovieLens | 4096 | 64 | 6.14 | 0.73 | -0.64 | 0.54 | 1.35 | 0.69 | -1.36 | 0.58 |
| 17 | MovieLens | 4096 | 128 | 5.80 | 0.72 | 1.28 | 0.54 | -0.98 | 0.69 | -1.30 | 0.58 |